{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "ff3392cf-f1c6-4067-9213-0eecca4e70f6",
   "metadata": {},
   "source": [
    "# Use LLM for Metadata extraction\n",
    "\n",
    "This demo uses a large language model for metadata extraction from text.\n",
    "\n",
    "Uses model hosted on Fireworks.ai to simplify set up. Can also use a local or self-hosted model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "13029c01-a035-4aae-9291-9d4f163252ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip3 install openai==1.39.0 pydantic==2.8.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "88dd7fc2-05ce-47eb-9d89-9f5c6aa8c72c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from ipython_secrets import get_secret\n",
    "import os\n",
    "os.environ[\"OPENAI_API_KEY\"] = get_secret('OPENAI_API_KEY')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8455e9be-fc4b-49cc-ae19-37a5c66e89a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os \n",
    "from openai import OpenAI \n",
    "from pydantic import BaseModel \n",
    "import json \n",
    " \n",
    "# Create client to call model \n",
    "api_key = os.environ[\"OPENAI_API_KEY\"] \n",
    "client = OpenAI( \n",
    "    api_key=api_key, \n",
    ") \n",
    " \n",
    "# Format response structure \n",
    "class TopicsResult(BaseModel): \n",
    "    topics: list[str] \n",
    " \n",
    "function_definition = {\n",
    "    \"name\": \"get_topics\",\n",
    "    \"description\": \"Extract the key topics from the text\",\n",
    "    \"parameters\": json.loads(TopicsResult.schema_json())\n",
    "}\n",
    "response = client.chat.completions.create( \n",
    "    model=\"gpt-4o-mini\", \n",
    "    functions=[function_definition], \n",
    "    function_call={ \"name\": function_definition[\"name\"] },\n",
    "    messages=[ \n",
    "        { \n",
    "            \"role\": \"system\", \n",
    "            \"content\": \"Extract key topics from the following text. Include no more than 3 key terms. Format response as a JSON object.\", \n",
    "        }, \n",
    "        { \n",
    "            \"role\": \"user\", \n",
    "            \"content\": \"Eggs, like milk, form a typical food, inasmuch as they contain all the elements, in the right proportion, necessary for the support of the body. Their highly concentrated, nutritive value renders it necessary to use them in combination with other foods rich in starch (bread, potatoes, etc.). In order that the stomach may have enough to act upon, a certain amount of bulk must be furnished.\" \n",
    "        } \n",
    "    ], \n",
    ") \n",
    " \n",
    "# Get model results as a dict \n",
    "content = TopicsResult.model_validate(json.loads(response.choices[0].message.function_call.arguments)) \n",
    " \n",
    "print(f\"Topics: {content.topics}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0b402fbc-a427-442b-9491-194035c4216a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
